/* Replication of "Can Student Body Diversity Foster Interethnic Trust, Tolerance, and National Identification Prioritization? The Role of Friendship in Kenya"

BJPS 2025

*Input files: BJPS_blecketal2025_anon.dta
*Output files: 
	-regressiontables.doc (all regressions)
	-replicationtables.xlsx (all other tables: balance tests, means, sensitivity etc)
	-eth_het_friends_v2.png : Figure 3
*Intermediate files: programs.do
*/
*set working drive: please edit accordingly
cd "G:\My Drive\Projects--Closed\Ford Program\Kenya Education Project\Replication\"
do programs.do
use BJPS_blecketal2025_anon.dta, clear
*******************************************************
*	MAIN RESULTS: Tables and Figures in Main Paper, in order of appearance
*******************************************************

*Table 1 : Student Diversity, by School Type
	balancetablesch samecty nborcty farcty n*_sch eth_het_sch if tagsch, using("replicationtables.xlsx") sheet(Table 1) by(national)  

	outreg, clear()
*Table 5 (2?): Multivariate Regressions, Identity Outcomes
	local options "varlabel se bdec(2) starlevels(10 5 1) starloc(1) keep(national) nocons addrows(County FE, Yes \ Pre-Treatment Controls, Yes)"
	local cov "national q16 gender ib6.q4 i.rel i.countysample  [pweight=weight3], vce(cl schoolname)"
	local store store(t2)
	foreach v of varlist identity3 identity4 {
		reg `v' `cov'
		outreg ,`options' `store'
		local store merge(t2) store(t2)
	}

*Table 6 (3?): Multivariate Regressions, Student Tolerance and Trust Outcomes
	local store store(t3)
	foreach v of varlist q59_comf trustsame q50_ever q51_ever {
	reg `v' `cov'
		outreg,`options' `store'
		local store merge(t3) store(t3)	
	}

*Table 4: Diversity of Student's friend group
	reg fr_diffeth national ingroup2 q16 gender ib6.q4 i.rel i.countysample [pweight=weight3], vce(cl schoolname)  

	outreg, keep(national ingroup2 q16 gender) ///
		varlabel se bdec(2) nocons ///
		addrows(County FE, Yes \ Controls for Mother Tongue and Religion, Yes) ///
		store(t4)

local note1 "Robust standard errors in parentheses. Pretreatment covariates include: Gender, religion, mother tongue, and prior exposure to diversity (neighbors in home community speak another language).  *** p<0.01, ** p<0.05, * p<0.1"
local note2 "Robust standard errors in parentheses, *** p<0.01, ** p<0.05, * p<0.10"

outreg using main_regressiontables.doc, replace replay(t2) title("Table 2: Multivariate Regressions, Identity Outcomes") note("`note1'")
outreg using main_regressiontables.doc, addtable replay(t3) title("Table 3: Multivariate Regressions, Student Tolerance and Trust Outcomes") note("`note1'")
outreg using main_regressiontables.doc, addtable replay(t4) title("Table 4: Diversity of Student's friend group") note("`note2'")

		
	*Figure 3 (1?): Ethnic Heterogeneity and Friendship with Ethnic Outgroup, by School Type
	graph twoway (scatter fr_diffeth_sch eth_het_sch if national==1 & eth_het_sch<.9, mcolor(black)) ///
		(scatter fr_diffeth_sch eth_het_sch if national==0 & eth_het_sch<.9, mcolor(black)) ///
		(qfit fr_diffeth_sch eth_het_sch if national==1 & eth_het_sch<.9, lcolor(gs10) xscale(r(.2 .85))) ///
		(qfit fr_diffeth_sch eth_het_sch if national==0 & eth_het_sch<.9, lcolor(gs10) lpattern(dash)), ///
		legend(label(1 "National School") label(2 "Comparison School")label(3 "Line of best fit, National") ///
		label(4 "Line of best fit, Comparison")) ytitle("Friendship with ethnic outgroup") sch(s1mono)
	graph export "eth_het_friends_v2.png", as(png) replace

	
*Table 13 (5?): Average Causal Mediation Effects, Social Identity Outcomes
	*step 1: run mediation models for Identity outcomes
		collect clear
		collect create IdentityModels_pr
		local cov q16 gender ingroup2 q26_1 ib77.q4_rc  i.countysample
		
		local n=1
		foreach v of varlist identity3 identity4 {
			collect _r_b _r_se _r_p, tag(model[(`n')]) name(IdentityModels_pr): ///
				mediate (`v' `cov', probit) (fr_diffeth `cov') ///
				(national) [pweight=weight3], all vce(cluster schoolname)
			local ++n 
			}

	*Styling the Table 
	*combining means and SE to 1 cell
		collect style cell result[_r_se], sformat("[%s]")
		collect stars _r_p 0.01 "***" 0.05 "**" 0.1 "*", attach(_r_b) shownote
		collect composite define meanse = _r_b _r_se,  trim

	*Labeling items
		collect label levels coleq NIE "Indirect Effect" TE "Total Effect" NDE "Direct Effect" 
		collect label levels model (1) "Ranks Nationality Above Ethnicity and Country" ///
			(2) "Ranks Ethnicity over Nationality and African Identity", modify
		collect style cell result, nformat(%6.2f) halign(center) 
		collect style cell cell_type[column-header], halign(center)
	*this tells the columns to have the label of model, not the #s
		collect style header model, level(label)
	*this gets rid of the label of the statisic (previously was displaying "Coefficient-Standard Error")
		collect style header result, level(hide)  
	*this creates the table I want:
		collect layout (coleq[NDE NIE TE]#result[meanse]) (model), name(IdentityModels_pr)
	*this exports it to excel
		collect export replicationtables.xlsx, name(IdentityModels_pr) sheet(Table13, replace) modify


*Table 6: Average Causal Mediation Effects, Trust and Tolerance Outcomes
	*step 1: run mediation models for Trust outcomes
		collect clear
		collect create TrustModels_pr
		collect _r_b _r_se _r_p, tag(model[(1)]) name(TrustModels_pr): ///
			mediate (q59_comf q56_comf ingroup2 q16 gender ib77.q4_rc  i.countysample, probit) ///
			(fr_diffeth q56_comf ingroup2 q16 gender ib6.q4  i.countysample) ///
			(national) [pweight=weight3], all vce(cluster schoolname)

		collect _r_b _r_se _r_p, tag(model[(2)]) name(TrustModels_pr): /// 
			mediate (trustsame ingroup2 q16 gender ib77.q4_rc  i.countysample, probit) ///
			(fr_diffeth ingroup2 q16 gender ib6.q4  i.countysample) ///
			(national) [pweight=weight3], all vce(cluster schoolname)

		collect _r_b _r_se _r_p, tag(model[(3)]) name(TrustModels_pr): ///
			mediate (q51_ever ingroup2 q16 gender ib77.q4_rc  i.countysample, probit) ///
			(fr_diffeth ingroup2 q16 gender ib6.q4  i.countysample) ///
			(national) [pweight=weight3], all vce(cluster schoolname)

		collect _r_b _r_se _r_p, tag(model[(4)]) name(TrustModels_pr): ///
			mediate (q50_ever ingroup2 q16 gender ib77.q4_rc  i.countysample, probit) ///
			(fr_diffeth ingroup2 q16 gender ib6.q4  i.countysample) ///
			(national) [pweight=weight3], all vce(cluster schoolname)

	*Styling the Table 
	*combining means and SE to 1 cell
		collect style cell result[_r_se], sformat("[%s]")
		collect stars _r_p 0.01 "***" 0.05 "**" 0.1 "*", attach(_r_b) shownote
		collect composite define meanse = _r_b _r_se,  trim

	*Labeling items
		collect label levels coleq NIE "Indirect Effect" TE "Total Effect" NDE "Direct Effect" 
		collect label levels model (1) "Comfortable marrying member of another ethnic group" ///
			(2) "Trust other ethnic groups same as own" (3) "Lent possessions ever" ///
			(4) "Lent money ever", modify
	 
		collect style cell result, nformat(%6.2f) halign(center) 
		collect style cell cell_type[column-header], halign(center)
	*this tells the columns to have the label of model, not the #s
		collect style header model, level(label)
	*this gets rid of the label of the statisic (previously was displaying "Coefficient-Standard Error")
		collect style header result, level(hide)  
	*this creates the table I want:
		collect layout (coleq[NDE NIE TE]#result[meanse]) (model), name(TrustModels_pr)
	*this exports it to excel
		collect export replicationtables.xlsx, name(TrustModels_pr) sheet(Table6, replace) modify


*******************************************************
*	APPENDIX: Tables and Figures in Appendix, in order of appearance
*******************************************************
*Table A1: Balance Tests, Student Demographics
	foreach v of varlist q45_1-q45_97 q67 q16 q17 moved q21 q27_1 q27_5 q27_6 q27_9 ///
		q28 q31_1 q32national q33_1 q33_5 samecty nborcty farcty rel_* eth_* ///
		ppi_nplscore q26_1-q26_6{
			replace `v'=`v'*100	
		}
	balancetable q1 rel_1 rel_2 rel_3 ppi_nplscore eth_* q80 moved q16 q17, ///
	using("replicationtables.xlsx") sheet(Table A1) by(national)
	
	foreach v of varlist q45_1-q45_97 q67 q16 q17 moved q21 q27_1 q27_5 q27_6 q27_9 ///
		q28 q31_1 q32national q33_1 q33_5 samecty nborcty farcty rel_* eth_* ///
		ppi_nplscore q26_1-q26_6{
			replace `v'=`v'/100	
		}

*Table A2: Balance Tests, School Observation
	balancetablesch studentpop beds_ct bed3_ct bathrooms_ct bathrooms_condG ///
		showers_condG diningarea_condG diningtables_condG labs_ct library_condG ///
		books_condG computers_ct computers_condG  studyarea_yn counselors_ct nurse_condG  ///
		sportsfacilities_ct clubs_ct q10pr_ct q7pr if tagsch, ///
		using("replicationtables.xlsx") sheet(Table A2) by(national) 

*Table A3: Balance Tests: School Choice and School Reputation
	balancetable q26_1 q26_2 q26_4 q27_1 q27_5 q27_6 q27_9 q28 q31_1 q32national ///
		q33_5 q44better nbenefits q45_1-q45_97 , using("replicationtables.xlsx") ///
		sheet(Table A3) by(national)

*Table A4: Diversity of Student's friend group, among students assigned 
	*to attend school by Ministry of Education
	reg fr_diffeth national ingroup2 q16 gender ib6.q4 i.countysample  ///
		if q26_1==1 [pweight=weight3], vce(cl schoolname)

	outreg, varlabel se bdec(2) starlevels(10 5 1) keep(national ingroup2 q16 gender) ///
		nocons addrows(County FE, Yes \ Pre-Treatment Controls, ///
		Yes \ Controls for Mother Tongue, Yes) store(ta4)

*Table A5: Diversity of Student's friend group, among students not assigned 
	*to attend school by Ministry of Education
	
	reg fr_diffeth national ingroup2 q16 gender ib6.q4 i.countysample  ///
		if q26_1!=1 [pweight=weight3], vce(cl schoolname)

	outreg, varlabel se bdec(2) starlevels(10 5 1) keep(national ingroup2 q16 gender) ///
		nocons addrows(County FE, Yes \ Pre-Treatment Controls, ///
		Yes \ Controls for Mother Tongue, Yes) store(ta5)

*Table A6: Student Identity Outcomes, Among students assigned 
	*to school by Ministry of Education
*Note to self: This is updated to reflect the same analytical model in the paper
	local options "varlabel se bdec(2) starlevels(10 5 1) starloc(1) keep(national) nocons addrows(County FE, Yes \ Pre-Treatment Controls, Yes)"
	local cov "national q16 gender ib6.q4 i.rel i.countysample if q26_1==1 [pweight=weight3], vce(cl schoolname)"
	local store store(ta6)
	foreach v of varlist identity3 identity4 {
		reg `v' `cov'
		outreg ,`options' `store'
		local store merge(ta6) store(ta6)
	}

*Table A7: Student Trust and Tolerance Outcomes, Among students assigned 
	*to school by Ministry of Education
*Note to self: This is updated to reflect the same analytical model in the paper
	local store store(ta7)
	foreach v of varlist q59_comf trustsame q50_ever q51_ever {
	reg `v' `cov'
		outreg,`options' `store'
		local store merge(ta7) store(ta7)	
	}
	
*Table A8: Student Identity Outcomes, Among students not assigned 
	*to school by Ministry of Education
*Note to self: This is updated to reflect the same analytical model in the paper
	local options "varlabel se bdec(2) starlevels(10 5 1) starloc(1) keep(national) nocons addrows(County FE, Yes \ Pre-Treatment Controls, Yes)"
	local cov "national q16 gender ib6.q4 i.rel i.countysample if q26_1!=1 [pweight=weight3], vce(cl schoolname)"
	local store store(ta8)
	foreach v of varlist identity3 identity4 {
		reg `v' `cov'
		outreg ,`options' `store'
		local store merge(ta8) store(ta8)
	}

*Table A9: Student Trust and Tolerance Outcomes, Among students assigned 
	*to school by Ministry of Education
*Note to self: This is updated to reflect the same analytical model in the paper
	local store store(ta9)
	foreach v of varlist q59_comf trustsame q50_ever q51_ever {
	reg `v' `cov'
		outreg,`options' `store'
		local store merge(ta9) store(ta9)	
	}
		
		

*Table A10: Summary Statistics of all Dependent Variables
	*A short program to combine all mean values
	cap program drop mytable_means
	cap frames drop means
	frames create means
	program define mytable_means
		syntax varlist, [ *]
		foreach v of varlist `varlist' {
			tempfile tf`v'means  
			qui parmby "svy: mean `v' ", saving(`tf`v'means', replace)  es(N) format(stderr estimate %8.2f) l
			local filemeans "`filemeans' `tf`v'means'"
			}

	*combine tempfiles to different frames. 
	frames change means 
	append using `filemeans'	
	drop dof t p parm min max
	 end
	 
	 mytable_means q59 q59_comf trustsame q50_ever q51_ever *first  identity3 identity4 
	 export excel using "replicationtables.xlsx" , sheet(TableA10, replace) 
	
	 frames change default

*Table A11: Rho and Variance statistics for all mediation models
	collect clear
	collect create sensitivity
	*Tolerance : has 1 additional covariate (parents' opinions about marriage to outgroup)
	*the below code saves some versions of the data with the relevant missing observations dropped, 
	*to pipe into R for creating the contour plots
	*Limiting dataset to only non-missing observations (this command really doesn't like missing's!)
	preserve 
		foreach v of varlist national fr_diffeth  ingroup2 q16 gender q4_rc_1-q4_rc_6  c_1-c_4 q56_comf q59_comf{
		drop if `v'==.
		} 
		medsens (regress fr_diffeth national q56_comf ingroup2 q16 gender q4_rc_1-q4_rc_6  c_1-c_4) ///
			(probit q59_comf national fr_diffeth q56_comf ingroup2 q16 gender q4_rc_1-q4_rc_6  c_1-c_4), ///
			mediate(fr_diffeth) treat(national) sims(10000) 
		collect get rho= (r(errcr)) r2t_thr=(r(r2t_thresh)) r2s_thr=(r(r2s_thresh)), ///
			tag(model[(1)]) name(sensitivity)
			
	restore 

	*All subsequent variables have the same covariates, can run in a loop
	local n=1
	foreach v of varlist trustsame q51_ever q50_ever identity3 identity4 {	
	preserve
	foreach cov of varlist national fr_diffeth ingroup2 q16 gender q4_rc_1-q4_rc_6 ///
		c_1-c_4 `v'{
		drop if `cov'==.
		}
	medsens (regress fr_diffeth national ingroup2 q16 gender q4_rc_1-q4_rc_6  c_1-c_4) ///
		(probit `v' national fr_diffeth ingroup2 q16 gender q4_rc_1-q4_rc_6  c_1-c_4), ///
		mediate(fr_diffeth) treat(national) sims(10000)
	collect get rho= (r(errcr)) r2t_thr=(r(r2t_thresh)) r2s_thr=(r(r2s_thresh)), ///
		tag(model[(`n')]) name(sensitivity)
	restore
	local ++n
	}	

	collect label levels model (1) "Comfortable marrying member of another ethnic group" ///
			(2) "Trust other ethnic groups same as own" (3) "Lent possessions ever" ///
			(4) "Lent money ever" (5) "Ranks Nationality Above Ethnicity and Country" ///
			(6) "Ranks Ethnicity over Nationality and African Identity", modify
	collect label levels result rho "Rho" r2t_thr "Total variance in mediator and outcome" r2s_thr "Residual variance in mediator and outcome"
	collect layout  (model) (result[rho r2t_thr r2s_thr]), name(sensitivity)
	collect export "replicationtables.xlsx", name(sensitivity) sheet(TableA11, replace) modify
	
*Figures A2-A7(Mediation Sensitivity) refer to R code

*Table A12: Moderation Analysis: Tolerance, National School Attendance, and Diverse Friendship 
	local title "Friendship, Moderation"
	local options "varlabel se bdec(2) starlevels(10 5 1) starloc(1) keep(1.national ingroup2 q16 gender) nocons addrows(County FE, Yes \ Controls for Mother Tongue, Yes)"
	local options2 "varlabel se bdec(2) starlevels(10 5 1) starloc(1) keep(1.national 1.q59_comf ingroup2 q16 gender) nocons addrows(County FE, Yes \ Controls for Mother Tongue, Yes)"
	local options3 "varlabel se bdec(2) starlevels(10 5 1) starloc(1) keep(1.national##1.q59_comf ingroup2 q16 gender) nocons addrows(County FE, Yes \ Controls for Mother Tongue, Yes)"
	local store store(ta12)

	reg fr_diffeth i.national ingroup2 q16 gender ib6.q4 i.countysample [pweight=weight3], vce(cl schoolname) 
	outreg, `options' store(ta12)

	reg fr_diffeth i.national i.q59_comf ingroup2 q16 gender ib6.q4 i.countysample [pweight=weight3], vce(cl schoolname)

	outreg, `options2' merge(ta12) store(ta12)

	*Fully interacting to see the moderation effect (should consider the interaction term here)
	reg fr_diffeth i.national##i.q59_comf ingroup2 q16 gender ib6.q4 i.countysample [pweight=weight3], vce(cl schoolname)

	outreg, `options3' merge(ta12) store(ta12) 

*Put all regresions into a single document 
local note1 "Robust standard errors in parentheses. Pretreatment covariates include: Gender, religion, mother tongue, and prior exposure to diversity (neighbors in home community speak another language).  *** p<0.01, ** p<0.05, * p<0.1"
local note2 "Robust standard errors in parentheses, *** p<0.01, ** p<0.05, * p<0.10"

outreg using regressiontables.doc, replace replay(t2) title("Table 2: Multivariate Regressions, Identity Outcomes") note("`note1'")
outreg using regressiontables.doc, addtable replay(t3) title("Table 3: Multivariate Regressions, Student Tolerance and Trust Outcomes") note("`note1'")
outreg using regressiontables.doc, addtable replay(t4) title("Table 4: Diversity of Student's friend group") note("`note2'")
	
	
*Put all appendix files into that same document
	local note "Robust standard errors in parentheses. *** p<0.01, ** p<0.05, * p<0.1"
	outreg using regressiontables.doc, addtable replay(ta4) title("Table A4: Diversity of Student's friend group, among students assigned to attend school by Ministry of Education") note("`note'")
	outreg using regressiontables.doc, addtable replay(ta5) title("Table A5: Diversity of Student's friend group, among students not assigned to attend school by Ministry of Education") note("`note'")
	outreg using regressiontables.doc, addtable replay(ta6) title("Table A6: Student Identity Outcomes, Among students assigned to school by Ministry of Education") note("`note'")
	outreg using regressiontables.doc, addtable replay(ta7) title("Table A7: Student Identity Outcomes, Among students not assigned to school by Ministry of Education") note("`note'")
	outreg using regressiontables.doc, addtable replay(ta8) title("Table A8: Student Trust and Tolerance Outcomes, Among students assigned to school by Ministry of Education") note("`note'")
	outreg using regressiontables.doc, addtable replay(ta9) title("Table A9: Student Trust and Tolerance Outcomes, Among students not assigned to school by Ministry of Education") note("`note'")
	outreg using regressiontables.doc, addtable replay(ta12) title("Table A12: Moderation Analysis: Tolerance, National School Attendance, and Diverse Friendship") note("`note'")